In [1]:
%load_ext autoreload
%autoreload 2

init and load image - visium tutorial¶

In [27]:
# import git
# import os 
# import getpass
# token = getpass.getpass()
In [28]:
# git.Git('/nfs/team205/ny1/ThymusSpatialAtlas/ImageSpot/').clone('https://nadavyayon:'+token+'@github.com/nadavyayon/jupyter-annotator.git')

you can use my env but do not modify it!!!! conda activate /nfs/team205/ny1/miniconda/miniconda3/envs/imagespot

In [2]:
import os
import numpy as np 
from bokeh.plotting import show, output_notebook
import pandas as pd
import socket
os.chdir('/nfs/team205/ny1/ThymusSpatialAtlas/ImageSpot/tissue-tag/')
import tissue_tag as tt
os.chdir('/nfs/team205/ny1/ThymusSpatialAtlas/ImageSpot/')
import visiumator as vm
import matplotlib.pyplot as plt
import scanpy as sc
output_notebook()
host = '5011'
Loading BokehJS ...

Step 1 - Load image and define annotation names and colors¶

In [4]:
plt.figure(figsize=[12,12])

vis_path = '/nfs/team205/ny1/Visium/ThymusAtlasDataSet/set160222/Align_and_detect_output/V11J11-099_Z4-THY-FO3_A1_TA11556493/'
im,ppm,df = tt.read_visium(SpaceRanger_dir_path=vis_path,use_resolution='hires',res_in_ppm=0.5)
coordinates = np.vstack((df[5],df[4]))
plt.imshow(im,origin='lower')
plt.plot(coordinates[0,:],coordinates[1,:],'.')
plt.title( 'ppm - '+str(ppm))

# adata = AnnVis[AnnVis.obs['in_tissue']==True]
Out[4]:
<Figure size 864x864 with 0 Axes>
Out[4]:
<matplotlib.image.AxesImage at 0x1480568e5d30>
Out[4]:
[<matplotlib.lines.Line2D at 0x1480568f9250>]
Out[4]:
Text(0.5, 1.0, 'ppm - 0.5')
In [7]:
# define annotations Lv0 
annodict = {
    'unassigned':'yellow',
    'Artifacts':'black',
    'Medulla':'green',
    'Cortex':'cyan',
    'Edge':'brown',
    'HS':'magenta',
    'Vessels':'blue',
    'PVS':'red',
    'Other':'orange',
}
In [8]:
# generate automated labels from markers

# these marker genes would be used to train the classifier in later stages or just to define the annotations if going with the manual option 
gene_markers = { 
    'Medulla' : 'AIRE',
    'Cortex' : 'ARPP21',
    # 'HS' : 'IVL',
    # 'Vessels' : 'CD34',
}
labels_per_marker = { # how many spots per marker gene
    'Medulla' : 50,
    'Cortex' : 150,
    # 'HS' : 15,
    # 'Vessels' : 15,
}
r = 100
In [9]:
adata = sc.read_visium(vis_path,count_file='raw_feature_bc_matrix.h5')
adata = adata[df.index.intersection(adata.obs.index)]
training_labels = tt.background_labels(im.shape[:2],coordinates,every_x_spots = 100,r=r*ppm) # generate background labels 
training_labels_rgb = tt.rgb_from_labels(training_labels,colors=list(annodict.values()))
training_labels = tt.gene_labels(adata,df,training_labels,gene_markers,annodict,r*ppm,labels_per_marker)
training_labels_rgb = tt.rgb_from_labels(training_labels,colors=list(annodict.values()))
out = tt.overlay_lebels(im,training_labels_rgb,alpha=0.5)
AIRE
ARPP21

Step 2 - (optional) Predict regions from image¶

scribble initial annotations¶

In [10]:
p, render_dict = tt.scribbler(im,annodict,plot_scale=1)
def app(doc):
    global p
    doc.add_root(p)

import socket
show(app,notebook_url=f'{socket.gethostname()}:'+host) # determine if working on a server or local environment, if working on a local pc - 'local', while working on server instance indicate the port number e.g. 5011 (string)
print('wait for it.. can take a few tens of seconds on farm...')
wait for it.. can take a few tens of seconds on farm...

plot scribble anotations¶

In [11]:
line_width = 100 # microns
training_labels_scrib = tt.scribble_to_labels(im,render_dict,line_width=line_width*ppm)
training_labels = training_labels + training_labels_scrib
training_labels_rgb = tt.rgb_from_labels(training_labels,colors=list(annodict.values()))
out = tt.overlay_lebels(im,training_labels_rgb,alpha=0.5)
unassigned
Artifacts
Medulla
Cortex
Edge
HS
Vessels
PVS
Other

run the RF classifier¶

In [ ]:
%%time

# this step takes about 30s to 5 min depending on number of training areas and resolution  
result = tt.sk_rf_classifier(im,training_labels)
# plot result 
result_rgb = tt.rgb_from_labels(result,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.7)
CPU times: user 3min 9s, sys: 21.9 s, total: 3min 31s
Wall time: 53.4 s

Step 3 - Manually draw annotations or Correct forest annotations (Step 2)¶

In [27]:
# at this stage, if you are adding annotations make sure they come after the current ones otherwise it will mess up everything
result_rgb = tt.rgb_from_labels(result,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.5,show=False)
tabs,render_dict = tt.annotator(im,out_img,anno_dict=annodict,fig_downsize_factor=4)
# plotting
def app(doc):
    global tabs
    doc.add_root(tabs)
show(app,notebook_url=f'{socket.gethostname()}:'+host)
print('wait for it.. can take a few tens of seconds on farm... (double time of the former plot)')
wait for it.. can take a few tens of seconds on farm... (double time of the former plot)

update annotations¶

In [26]:
out_img, result = tt.update_annotator(im,result,anno_dict=annodict,render_dict=render_dict,alpha=0.5)
unassigned
Cortex
Edge
Other

done? great. not done? return to Step 3 and repeat until happy :)¶

In [28]:
# plot result 
result_rgb = tt.rgb_from_labels(result,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.5)

save annotation¶

In [32]:
isExist = os.path.exists(vis_path+'tissue_annotations')
if not(isExist):
    os.mkdir(vis_path+'/tissue_annotations/')
    
tt.save_annotation(
    folder=vis_path+'/tissue_annotations/',
    label_image=result,
    file_name='annotations_level_0',
    anno_names=list(annodict.keys()),
    anno_colors=list(annodict.values()),
    ppm = ppm,
)

Step 4 - move annotations to anndata¶

load annotations¶

In [5]:
# load tissue annotations from jupyter annotator 
annotations_level_0,annotations_level_0_labels,ppm,annodict = tt.load_annotation(vis_path+'/tissue_annotations/','annotations_level_0',load_colors=True)
# annotations_level_0[np.where(annotations_level_0==9)] = 1
im,ppm,positions = tt.read_visium(SpaceRanger_dir_path=vis_path+'/',use_resolution='hires5K',res_in_ppm=ppm)

result_rgb = tt.rgb_from_labels(annotations_level_0,colors=list(annodict.values()))
out_img = tt.overlay_lebels(im,result_rgb,alpha=0.5)
print(np.unique(annotations_level_0))
loaded annotation image - annotations_level_0 size - (3957, 3886)
loaded annotations
{1: 'unassigned', 2: 'Artifacts', 3: 'Medulla', 4: 'Cortex', 5: 'Edge', 6: 'HS', 7: 'Vessels', 8: 'PVS', 9: 'Other'}
loaded ppm
{'ppm': 0.5}
loaded color annotations
{'unassigned': 'yellow', 'Artifacts': 'black', 'Medulla': 'green', 'Cortex': 'cyan', 'Edge': 'brown', 'HS': 'magenta', 'Vessels': 'blue', 'PVS': 'red', 'Other': 'orange'}
[1 2 3 4 5 6 7 8 9]

migrate annotations to hi res grid sapce¶

In [6]:
%%time
# it's critical not to split cell part and recalculate the df everytime you cahnge the dist2cluster resolution - I have an overflow bug somewhere that I need to fix
spot_diameter = 30 # don't go crazy here avoid under 15um spots 
df = tt.grid_anno(
    im,
    [annotations_level_0],
    ['annotations_level_0'],
    [annotations_level_0_labels],
    spot_diameter=spot_diameter,
    pixels_per_micron=ppm
)
df
generating grid with spot size - 30, with resolution of - 0.5 ppm
annotations_level_0
68640it [00:09, 7331.22it/s]
CPU times: user 9.87 s, sys: 93 ms, total: 9.97 s
Wall time: 9.92 s
Out[6]:
x y annotations_level_0 annotations_level_0_number
index
0 15.0 15.000000 unassigned 1
1 15.0 29.931559 unassigned 1
2 15.0 44.863117 unassigned 1
3 15.0 59.794678 unassigned 1
4 15.0 74.726234 unassigned 1
... ... ... ... ...
68635 3871.0 3889.773682 unassigned 1
68636 3871.0 3904.705322 unassigned 1
68637 3871.0 3919.636963 unassigned 1
68638 3871.0 3934.568359 unassigned 1
68639 3871.0 3949.500000 unassigned 1

68640 rows × 4 columns

In [7]:
# remove background spots
df = df.iloc[np.where(df['annotations_level_0']!='unassigned')[0],:]
In [8]:
from scipy.spatial import distance_matrix, distance
a = np.vstack([df['x'],df['y']]).astype(int)
distM = distance.cdist(a.T,a.T, metric='euclidean')
In [9]:
print('calculating distances')
tt.dist2cluster(df,annotation='annotations_level_0',distM=distM,resolution=1) # calculate minimum median distance of each spot to clusters 
tt.plot_grid(df,annotation='annotations_level_0',figsize=[10,10],spotsize=3)
calculating distances
Artifacts
Cortex
Edge
HS
Medulla
Other
PVS
Vessels
Out[9]:
{'Artifacts': array([  0.        , 667.69004785, 682.45512673, ...,  30.        ,
         15.        ,   0.        ]),
 'Cortex': array([339.20200471,  44.55333882,  44.55333882, ...,  15.        ,
         16.55294536,  16.55294536]),
 'Edge': array([310.19026419,   0.        ,   0.        , ...,   0.        ,
          0.        ,  15.        ]),
 'HS': array([923.48741193, 835.88097239, 830.96329642, ...,  94.86832981,
         91.24143795,  90.        ]),
 'Medulla': array([805.76175139, 664.80448254, 653.3215135 , ...,  30.        ,
         33.54101966,  42.42640687]),
 'Other': array([940.39353464, 596.69925423, 598.01755827, ..., 426.39183857,
        432.90299144, 439.82951242]),
 'PVS': array([843.54312279, 824.08858747, 830.04638425, ..., 656.54017394,
        653.46002173, 650.71114944]),
 'Vessels': array([2561.13744262, 2078.71234181, 2071.26289978, ..., 1505.42751403,
        1508.54797736, 1511.81083473])}

move annotation and distances to visium spots¶

In [10]:
# calculate distance matrix between hires and visium spots
positions = tt.anno_to_visium_spots(
    df_spots=positions,
    df_grid=df,
)
positions
make sure the coordinate systems are alligned e.g. axes are not flipped
migrating - annotations_level_0 to segmentations
migrating - annotations_level_0_number to segmentations
migrating - L2_dist_annotations_level_0_Artifacts to segmentations
migrating - L2_dist_annotations_level_0_Cortex to segmentations
migrating - L2_dist_annotations_level_0_Edge to segmentations
migrating - L2_dist_annotations_level_0_HS to segmentations
migrating - L2_dist_annotations_level_0_Medulla to segmentations
migrating - L2_dist_annotations_level_0_Other to segmentations
migrating - L2_dist_annotations_level_0_PVS to segmentations
migrating - L2_dist_annotations_level_0_Vessels to segmentations
Out[10]:
1 2 3 4 5 annotations_level_0 annotations_level_0_number L2_dist_annotations_level_0_Artifacts L2_dist_annotations_level_0_Cortex L2_dist_annotations_level_0_Edge L2_dist_annotations_level_0_HS L2_dist_annotations_level_0_Medulla L2_dist_annotations_level_0_Other L2_dist_annotations_level_0_PVS L2_dist_annotations_level_0_Vessels
0
GTATTCTTACCGTGCT-1 1.0 7.0 21.0 600.611934 892.788159 Edge 5 139.014388 15.000000 0.00000 239.610100 224.200803 1023.989258 215.127869 2404.819536
GCGGTAACCCAAATGA-1 1.0 7.0 23.0 600.715978 942.575622 Cortex 4 93.920179 0.000000 27.45906 210.011904 195.243438 1033.924562 211.002370 2389.599548
CTACGTGTTGCCACCA-1 1.0 6.0 24.0 557.335251 967.497706 Edge 5 50.089919 15.000000 0.00000 227.589543 214.462118 1081.566457 254.000000 2415.968543
CTAGATAAACTCCTCG-1 1.0 7.0 25.0 600.820021 992.363084 Cortex 4 42.426407 0.000000 45.00000 173.556907 160.589539 1040.721384 209.000000 2362.246600
TCCATTAGTTGGATAG-1 1.0 6.0 26.0 557.439295 1017.285168 Cortex 4 16.552945 0.000000 15.00000 200.179919 186.324985 1079.529990 246.456893 2388.600636
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
CTTGATGACCATCCAG-1 1.0 70.0 98.0 3340.880769 2807.819225 Edge 5 60.000000 26.627054 0.00000 166.721324 79.849859 322.280933 148.600135 950.223658
CTTGCCCACCCACGCA-1 1.0 70.0 100.0 3340.984813 2857.606687 Cortex 4 16.552945 0.000000 15.00000 157.714933 54.120237 355.584027 110.453610 980.041326
GGACAACCATGAAGCC-1 1.0 70.0 102.0 3341.088857 2907.394150 Artifacts 2 0.000000 15.000000 15.00000 174.631040 54.120237 397.555028 93.605555 1019.580796
AGGTGCACGTCCACAT-1 1.0 70.0 104.0 3341.192900 2957.181612 Edge 5 15.000000 26.627054 0.00000 202.625270 66.189123 449.449663 93.920179 1066.723019
ACCAAGTCATCGGCAG-1 1.0 70.0 106.0 3341.296944 3006.969075 Edge 5 33.541020 27.459060 0.00000 227.035240 80.709355 472.128161 111.126055 1099.483515

2958 rows × 15 columns

In [14]:
import scanpy as sc
adata_vis = sc.read_h5ad(vis_path+'/Corrected_AnnData.h5ad')
adata_vis.obs = pd.concat([adata_vis.obs,positions.iloc[:,5:]],axis=1)
In [17]:
sc.pl.spatial(adata_vis,color=['annotations_level_0','L2_dist_annotations_level_0_Edge','L2_dist_annotations_level_0_Medulla','L2_dist_annotations_level_0_HS'],cmap='gist_rainbow',ncols=2)
In [ ]: